"""Make table showing novels by year and gender."""
import argparse
import functools

import numpy as np
import pandas as pd

parser = argparse.ArgumentParser()
parser.add_argument('input_filename', help='Input csv file with data.')
parser.add_argument('output_filename', help='Output path for table.')
parser.add_argument('--decade', action='store_true', help='show novels by decade instead of year.')


@functools.lru_cache()
def dataset(input_filename):
    """Gather dataset."""
    return pd.read_csv(input_filename)


def make_table_by_year(input_filename, output_filename):
    """Make table by year, reprint canon only."""
    df = dataset(input_filename).copy()
    columns_oi = ['YEAR_OF_FIRST_EDITION', 'AUTHOR_GENDER']
    df = df[columns_oi]
    df = df.rename(columns={'YEAR_OF_FIRST_EDITION': 'Year', 'AUTHOR_GENDER': 'Gender'})
    df = df.groupby(['Year', 'Gender']).size().unstack()
    df = df.reindex(index=range(1789, 1919 + 1)).fillna(0).astype(int)

    desired_column_order = ['M', 'F', 'U']
    df = df.reindex(columns=desired_column_order)

    df = df.rename(columns={
        'M': 'Men-authored',
        'F': 'Women-authored',
        'U': 'Unknown',
    })

    # longtable caption must go inside the longtable environment (with no `table` enclosing environment
    longtable_latex = df.to_latex(index=True, longtable=True, escape=False)
    longtable_caption = r"""\caption{\textbf{Novels published between 1789 and 1919 which are still in print.} The table shows counts of novels originally published between 1789 and 1919 available from Broadview Press, Penguin, or Oxford in 2018. Sources: Broadview Press 2018 English Catalogue, Penguin Classics 2016 Catalog, Oxford World's Classics 2016 Catalog.\label{tbl:novels-reprint-canon-by-year}}"""
    longtable_latex_lines = longtable_latex.splitlines()
    with open(output_filename, 'w') as fh:
        fh.write('\n'.join(longtable_latex_lines[:-1]))
        fh.write('\n\n' + longtable_caption + '\n\n')
        fh.write(longtable_latex_lines[-1])


def make_table_by_decade(input_filename, output_filename):
    df = dataset(input_filename).copy()
    columns_oi = ['YEAR_OF_FIRST_EDITION', 'AUTHOR_GENDER']
    df = df[columns_oi]
    df = df.rename(columns={'YEAR_OF_FIRST_EDITION': 'Year', 'AUTHOR_GENDER': 'Gender'})
    df = df.groupby(['Year', 'Gender']).size().unstack()
    df = df.reindex(index=range(1789, 1919 + 1)).fillna(0).astype(int)

    desired_column_order = ['M', 'F', 'U']
    df = df.reindex(columns=desired_column_order)

    # drop 1789 since it is not used
    df = df.drop(index=1789)
    assert len(df) == 130, len(df)
    df['decade'] = [1790 + 10 * decade_index for decade_index in np.digitize(df.index, range(1800, 1919 + 1, 10))]
    df = df.groupby('decade').sum()
    row_totals, column_totals = df.sum(axis=1), df.sum(axis=0)

    def format_row(series, row_totals):
        total = row_totals.loc[series.name]
        return pd.Series(
            [f'{value} ({value / total:.0%})' for value in series.values],
            index=series.index)

    df = df.apply(lambda values: format_row(values, row_totals), axis=1)
    df['N'] = row_totals.astype(int)
    totals = pd.Series([f'{val} ({val / column_totals.sum():.0%})' for val in column_totals], index=column_totals.index, name='All')  # noqa
    df.index = [f'{year}-{year + 9}' for year in df.index]

    # final fixes
    df = df.append(totals)
    df.loc['All', 'N'] = column_totals.sum()
    df['N'] = df['N'].astype(int)
    df = df.rename(columns={
        'M': 'Men-authored',
        'F': 'Women-authored',
        'U': 'Unknown',
    })
    df.index.name = ''
    df.columns.name = ''
    with open(output_filename, 'w') as fh:
        fh.write(df.to_latex(index=True))


if __name__ == '__main__':
    args = parser.parse_args()
    if args.decade:
        make_table_by_decade(args.input_filename, args.output_filename)
    else:
        make_table_by_year(args.input_filename, args.output_filename)
